Load libraries and data.
library(dplyr)
library(ggplot2)
library(broom)
library(tidyr)
library(mosaic)
library(nlme)
library(lme4)
library(knitr)
load("movies2.RData")
pop_summary <- movies2 %>% group_by(genre) %>%
summarize(
mean = mean(rating),
sigma = sd(rating)) # call it sigma instead of sd
pop_summary # answer to a.
pop_diff <- pop_summary$mean[1] - pop_summary$mean[2]
pop_sigma <- sqrt(pop_summary$sigma[1]^2+ pop_summary$sigma[2]^2)
c(pop_diff, pop_sigma) # answer to b.
[1] -0.4177215 1.8998867
answer to c.: Normal distribution with mean pop_diff and standard deviation pop_sigma/N where N is a sample size.
set.seed(2007)
# answer to d.
movies2 %>% group_by(genre) %>%
sample_n(30) %>% summarize(mean = mean(rating), sd = sd(rating), n= n())
# answer to e.
my_movie_samples <- function(sample_size) {
movies2 %>% # test
group_by(genre) %>%
sample_n(sample_size) %>% # don't get confused with sample()
summarize( mean = mean(rating),
sd = sd(rating),
n = n())
}
# answer to f.
my_boot1 <- mosaic::do(100) * {
my_movie_samples(30)
}
reshape_movie_samples <- function(bt_samples) {
bt_samples %>% data.frame() %>% # don't forget to use data.frame()
dplyr::select(-.row) %>%
reshape(idvar= ".index", timevar="genre",
direction="wide") %>%
mutate(bt_diff = (mean.Action - mean.Romance))
}
reshaped_my_boot1 <- reshape_movie_samples(my_boot1)
density_sample_movies <- function(rehsaped_samples, N, B) {
rehsaped_samples %>%
ggplot(aes(x = bt_diff)) +
geom_density(fill = "steelblue", adjust = 2, alpha = .75) + xlim(c(-2, 2) + pop_diff) +
geom_vline(xintercept = mean(rehsaped_samples$bt_diff), color = "steelblue", size = 1) +
geom_vline(xintercept = pop_diff, color = "yellow", size = 1) + # CTL prediction mean
stat_function(fun = dnorm, colour = "yellow", size =1, # CTL prediction distribution
args = list(mean = pop_diff,
sd = pop_sigma/sqrt(rehsaped_samples$n.Action[1]))) +
labs(title = paste0("Bootstrop: ", B, ", Num observations:", N ))
}
stats_sample_movies <- function(reshaped_samples) {
reshaped_samples %>%
summarize(
diff_mean = mean(bt_diff),
diff_sd = sd(bt_diff),
p_val = sum(bt_diff>0)/length(bt_diff)*2,
theory_mean = pop_diff,
theory_sd = pop_sigma/sqrt(length(bt_diff)),
abs_error_mean = abs(diff_mean - theory_mean),
abs_error_sd = abs(diff_sd - theory_sd)
)
}
# answer to g.
density_sample_movies(reshaped_my_boot1, 30, 100)
stats_sample_movies(reshaped_my_boot1)
loc_N <- c(20, 30, 40, 50, 75, 100)
loc_B <- c(100, 500, 1000, 2000, 4000)
list_density <- list()
list_stats <- list()
# This will take some time
for (idx_N in 1:length(loc_N)) {
list_density[[idx_N]] <- list()
list_stats[[idx_N]] <- list()
for (idx_B in 1:length(loc_B)) {
print(paste0('N =', loc_N[idx_N],', B = ', loc_B[idx_B]))
my_boot1 <- mosaic::do(loc_B[idx_B]) * {
my_movie_samples(loc_N[idx_N])
}
reshaped_my_boot1 <- reshape_movie_samples(my_boot1)
list_density[[idx_N]][[idx_B]] <- density_sample_movies(reshaped_my_boot1, loc_N[idx_N], loc_B[idx_B])
list_stats[[idx_N]][[idx_B]] <- stats_sample_movies(reshaped_my_boot1)
}
}
[1] "N =20, B = 100"
[1] "N =20, B = 500"
[1] "N =20, B = 1000"
[1] "N =20, B = 2000"
[1] "N =20, B = 4000"
[1] "N =30, B = 100"
[1] "N =30, B = 500"
[1] "N =30, B = 1000"
[1] "N =30, B = 2000"
[1] "N =30, B = 4000"
[1] "N =40, B = 100"
[1] "N =40, B = 500"
[1] "N =40, B = 1000"
[1] "N =40, B = 2000"
[1] "N =40, B = 4000"
[1] "N =50, B = 100"
[1] "N =50, B = 500"
[1] "N =50, B = 1000"
[1] "N =50, B = 2000"
[1] "N =50, B = 4000"
[1] "N =75, B = 100"
[1] "N =75, B = 500"
[1] "N =75, B = 1000"
[1] "N =75, B = 2000"
[1] "N =75, B = 4000"
[1] "N =100, B = 100"
[1] "N =100, B = 500"
[1] "N =100, B = 1000"
[1] "N =100, B = 2000"
[1] "N =100, B = 4000"
# Use Plot Pane in RStudio <- -> to observe the influence of N
for (idx_N in 1:length(loc_N)) print(list_density[[idx_N]][[which(loc_B==max(loc_B))]])
# dispersion decreases with N
for (idx_N in 1:length(loc_N)) print(list_density[[idx_N]][[which(loc_B==min(loc_B))]])
extract_list_stats_N <- function(seq, idx_B, stat) {
lapply(c(1:length(seq)),
function (idx_N) list_stats[[idx_N]][[idx_B]][[stat]]) %>% unlist()
}
extract_list_stats_B <- function(seq, idx_N, stat) {
lapply(c(1:length(seq)),
function (idx_B) list_stats[[idx_N]][[idx_B]][[stat]]) %>% unlist()
}
max_B <- which(loc_B==max(loc_B)) # index of max B
max_N <- which(loc_N==max(loc_N)) # index of max N
results_N <- data.frame(
N = loc_N,
p_val = extract_list_stats_N(loc_N, max_B, "p_val"),
abs_error_mean = extract_list_stats_N(loc_N, max_B, "abs_error_mean"),
abs_error_sd = extract_list_stats_N(loc_N, max_B, "abs_error_sd")
)
results_B <- data.frame(
B = loc_B,
p_val = extract_list_stats_B(loc_B, max_N, "p_val"),
abs_error_mean = extract_list_stats_B(loc_B, max_N, "abs_error_mean"),
abs_error_sd = extract_list_stats_B(loc_B, max_N, "abs_error_sd")
)
# answer to e.
results_N %>% # proportional to 1/sqrt(N)
ggplot(aes(x = N, y = p_val)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
results_N %>% # already unbiased estimator (each mean gets more accurate with N but the mean of means do not)
ggplot(aes(x = N, y = abs_error_mean)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
results_N %>% # proportional to 1/sqrt(N)
ggplot(aes(x = N, y = abs_error_sd)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
# answer to f.
# slowly converges to some lower or upper bounds for given N, not following 1/sqrt(N) function
results_B %>%
ggplot(aes(x = B, y = p_val)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
results_B %>%
ggplot(aes(x = B, y = abs_error_mean)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
results_B %>%
ggplot(aes(x = B, y = abs_error_sd)) + geom_point() +
geom_smooth(method = "lm", formula = y ~ sqrt(1/x), se=FALSE)
ChickWeight2 <- ChickWeight # make a copy that we may modify
# answers to a through d.
lm( weight ~ Diet + Time + I(Time^2) , data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet + Time + I(Time^2), data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-146.372 -16.983 -0.684 15.283 132.295
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 21.56559 4.18810 5.149 3.60e-07 ***
Diet2 16.08443 4.02910 3.992 7.40e-05 ***
Diet3 36.41776 4.02910 9.039 < 2e-16 ***
Diet4 30.28713 4.05041 7.478 2.86e-13 ***
Time 5.42189 0.83037 6.530 1.46e-10 ***
I(Time^2) 0.15615 0.03758 4.155 3.74e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 35.49 on 572 degrees of freedom
Multiple R-squared: 0.7528, Adjusted R-squared: 0.7506
F-statistic: 348.3 on 5 and 572 DF, p-value: < 2.2e-16
lm( weight ~ Diet + Time + I(Time^2) + Chick, data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet + Time + I(Time^2) + Chick, data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-89.323 -14.177 -0.719 14.127 82.761
Coefficients: (3 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 187.95907 56.48103 3.328 0.000937 ***
Diet2 -111.80410 33.76427 -3.311 0.000992 ***
Diet3 158.02561 151.28543 1.045 0.296710
Diet4 -795.65043 364.56383 -2.182 0.029516 *
Time 5.49744 0.64936 8.466 2.54e-16 ***
I(Time^2) 0.15092 0.02937 5.138 3.91e-07 ***
Chick.L 1536.03638 616.23832 2.493 0.012988 *
Chick.Q 1132.76640 633.68151 1.788 0.074417 .
Chick.C 701.07385 414.17189 1.693 0.091102 .
Chick^4 48.78857 77.52011 0.629 0.529382
Chick^5 -652.87056 344.64786 -1.894 0.058732 .
Chick^6 -617.94097 321.03777 -1.925 0.054790 .
Chick^7 -42.42155 21.07590 -2.013 0.044645 *
Chick^8 469.78475 223.49896 2.102 0.036032 *
Chick^9 383.05395 202.41855 1.892 0.058988 .
Chick^10 18.20186 50.35810 0.361 0.717909
Chick^11 -183.80024 93.53871 -1.965 0.049944 *
Chick^12 -189.58610 113.30433 -1.673 0.094873 .
Chick^13 -119.70805 75.00316 -1.596 0.111080
Chick^14 -22.65442 16.42693 -1.379 0.168449
Chick^15 93.64822 57.69960 1.623 0.105182
Chick^16 172.81090 100.87837 1.713 0.087290 .
Chick^17 151.42028 82.48806 1.836 0.066972 .
Chick^18 7.38371 21.39654 0.345 0.730165
Chick^19 -210.88545 106.69882 -1.976 0.048625 *
Chick^20 -208.42605 108.68477 -1.918 0.055689 .
Chick^21 -22.62376 17.08739 -1.324 0.186077
Chick^22 151.86556 77.69090 1.955 0.051143 .
Chick^23 162.98077 86.46808 1.885 0.059999 .
Chick^24 65.91731 43.69643 1.509 0.132020
Chick^25 -38.14827 14.09694 -2.706 0.007028 **
Chick^26 -38.66113 38.65533 -1.000 0.317698
Chick^27 -87.80072 58.36564 -1.504 0.133099
Chick^28 -103.28557 57.51292 -1.796 0.073089 .
Chick^29 -26.05045 18.29960 -1.424 0.155169
Chick^30 81.79350 48.56642 1.684 0.092744 .
Chick^31 164.74431 89.13544 1.848 0.065128 .
Chick^32 11.45557 9.15985 1.251 0.211626
Chick^33 62.49349 30.62838 2.040 0.041811 *
Chick^34 90.81597 45.86262 1.980 0.048205 *
Chick^35 18.49639 11.65825 1.587 0.113216
Chick^36 44.91319 29.41438 1.527 0.127384
Chick^37 -28.65947 19.38399 -1.479 0.139869
Chick^38 101.27934 58.13177 1.742 0.082051 .
Chick^39 -198.63652 105.99375 -1.874 0.061479 .
Chick^40 -100.96158 67.80076 -1.489 0.137062
Chick^41 97.05775 56.70464 1.712 0.087553 .
Chick^42 -54.84619 21.76126 -2.520 0.012018 *
Chick^43 -98.99305 38.04020 -2.602 0.009520 **
Chick^44 62.56038 28.47294 2.197 0.028442 *
Chick^45 -106.60378 51.64156 -2.064 0.039479 *
Chick^46 -14.60983 15.89201 -0.919 0.358350
Chick^47 NA NA NA NA
Chick^48 NA NA NA NA
Chick^49 NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 27.62 on 526 degrees of freedom
Multiple R-squared: 0.8623, Adjusted R-squared: 0.8489
F-statistic: 64.58 on 51 and 526 DF, p-value: < 2.2e-16
lmer( weight ~ Diet + Time + I(Time^2) + (1 | Chick), data = ChickWeight2) %>% summary()
Linear mixed model fit by REML ['lmerMod']
Formula: weight ~ Diet + Time + I(Time^2) + (1 | Chick)
Data: ChickWeight2
REML criterion at convergence: 5563
Scaled residuals:
Min 1Q Median 3Q Max
-3.4591 -0.5216 -0.0029 0.4874 3.1903
Random effects:
Groups Name Variance Std.Dev.
Chick (Intercept) 523.8 22.89
Residual 762.5 27.61
Number of obs: 578, groups: Chick, 50
Fixed effects:
Estimate Std. Error t value
(Intercept) 21.46154 6.08152 3.529
Diet2 16.26072 9.42627 1.725
Diet3 36.59405 9.42627 3.882
Diet4 30.21469 9.43254 3.203
Time 5.47872 0.64791 8.456
I(Time^2) 0.15195 0.02932 5.183
Correlation of Fixed Effects:
(Intr) Diet2 Diet3 Diet4 Time
Diet2 -0.521
Diet3 -0.521 0.339
Diet4 -0.520 0.339 0.339
Time -0.388 -0.005 -0.005 -0.007
I(Time^2) 0.324 0.001 0.001 0.004 -0.964
lmer( weight ~ (1 | Diet) + Time + I(Time^2) + (1 | Chick), data = ChickWeight2) %>% summary()
Linear mixed model fit by REML ['lmerMod']
Formula: weight ~ (1 | Diet) + Time + I(Time^2) + (1 | Chick)
Data: ChickWeight2
REML criterion at convergence: 5589.7
Scaled residuals:
Min 1Q Median 3Q Max
-3.4632 -0.5206 -0.0080 0.4783 3.2033
Random effects:
Groups Name Variance Std.Dev.
Chick (Intercept) 522.7 22.86
Diet (Intercept) 229.5 15.15
Residual 762.5 27.61
Number of obs: 578, groups: Chick, 50; Diet, 4
Fixed effects:
Estimate Std. Error t value
(Intercept) 41.65018 8.79649 4.735
Time 5.48149 0.64790 8.460
I(Time^2) 0.15190 0.02932 5.181
Correlation of Fixed Effects:
(Intr) Time
Time -0.273
I(Time^2) 0.226 -0.964
# answers to e through g.
lm( weight ~ Diet*Time - Diet, data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet * Time - Diet, data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-135.727 -15.696 -0.928 13.141 129.109
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 27.8588 2.6596 10.475 < 2e-16 ***
Time 7.0492 0.2573 27.392 < 2e-16 ***
Diet2:Time 1.6112 0.3044 5.294 1.71e-07 ***
Diet3:Time 3.7383 0.3044 12.282 < 2e-16 ***
Diet4:Time 2.8614 0.3086 9.273 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 34.08 on 573 degrees of freedom
Multiple R-squared: 0.7717, Adjusted R-squared: 0.7701
F-statistic: 484.1 on 4 and 573 DF, p-value: < 2.2e-16
lm( weight ~ Diet*Time - Diet + Chick, data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet * Time - Diet + Chick, data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-79.06 -14.78 -1.71 14.71 87.54
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 28.2445 1.9880 14.207 < 2e-16 ***
Time 6.6906 0.2598 25.752 < 2e-16 ***
Chick.L 25.6467 13.8456 1.852 0.064541 .
Chick.Q -13.5654 12.4737 -1.088 0.277306
Chick.C 51.9446 11.7779 4.410 1.25e-05 ***
Chick^4 9.2225 11.3481 0.813 0.416764
Chick^5 -33.3384 10.2390 -3.256 0.001203 **
Chick^6 41.0952 11.1258 3.694 0.000244 ***
Chick^7 -0.2107 9.0451 -0.023 0.981424
Chick^8 5.8962 10.1114 0.583 0.560059
Chick^9 13.4345 8.9387 1.503 0.133453
Chick^10 -16.1181 8.8059 -1.830 0.067763 .
Chick^11 -45.0382 8.3171 -5.415 9.34e-08 ***
Chick^12 9.6177 8.1713 1.177 0.239727
Chick^13 55.0050 7.9173 6.947 1.11e-11 ***
Chick^14 8.1415 7.7757 1.047 0.295562
Chick^15 -45.9058 7.7621 -5.914 6.03e-09 ***
Chick^16 -15.9530 7.7400 -2.061 0.039784 *
Chick^17 28.2005 7.7793 3.625 0.000317 ***
Chick^18 17.2879 7.6943 2.247 0.025065 *
Chick^19 -24.3940 7.6489 -3.189 0.001512 **
Chick^20 13.0230 7.7369 1.683 0.092924 .
Chick^21 12.9952 7.4552 1.743 0.081901 .
Chick^22 -10.4667 7.6565 -1.367 0.172202
Chick^23 3.4996 7.4382 0.470 0.638199
Chick^24 3.9263 7.4620 0.526 0.598990
Chick^25 -35.2223 7.3733 -4.777 2.31e-06 ***
Chick^26 20.8035 7.4297 2.800 0.005298 **
Chick^27 41.5754 7.3843 5.630 2.94e-08 ***
Chick^28 8.9789 7.3614 1.220 0.223121
Chick^29 -22.6530 7.3736 -3.072 0.002236 **
Chick^30 -7.0641 7.3921 -0.956 0.339699
Chick^31 11.6522 7.4664 1.561 0.119215
Chick^32 16.4033 7.3314 2.237 0.025680 *
Chick^33 -3.1074 7.3950 -0.420 0.674512
Chick^34 -6.9050 7.4016 -0.933 0.351297
Chick^35 0.5745 7.3249 0.078 0.937516
Chick^36 -16.0760 7.3734 -2.180 0.029681 *
Chick^37 -4.2042 7.3449 -0.572 0.567300
Chick^38 -12.9041 7.3685 -1.751 0.080489 .
Chick^39 -11.6780 7.4907 -1.559 0.119601
Chick^40 42.2806 7.4377 5.685 2.18e-08 ***
Chick^41 -13.5917 7.4133 -1.833 0.067307 .
Chick^42 -32.8739 7.3684 -4.461 9.97e-06 ***
Chick^43 -38.4670 7.4214 -5.183 3.12e-07 ***
Chick^44 12.3700 7.4307 1.665 0.096565 .
Chick^45 -20.2123 7.4505 -2.713 0.006889 **
Chick^46 23.0112 7.3715 3.122 0.001898 **
Chick^47 -11.3521 7.4007 -1.534 0.125651
Chick^48 -16.5302 7.3528 -2.248 0.024981 *
Chick^49 -31.1805 7.3562 -4.239 2.66e-05 ***
Diet2:Time 1.9185 0.4294 4.468 9.67e-06 ***
Diet3:Time 4.7322 0.4294 11.022 < 2e-16 ***
Diet4:Time 2.9653 0.4350 6.817 2.57e-11 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 25.37 on 524 degrees of freedom
Multiple R-squared: 0.8843, Adjusted R-squared: 0.8726
F-statistic: 75.54 on 53 and 524 DF, p-value: < 2.2e-16
lmer( weight ~ Diet*Time - Diet + (1 | Chick), data = ChickWeight2) %>% summary()
Linear mixed model fit by REML ['lmerMod']
Formula: weight ~ Diet * Time - Diet + (1 | Chick)
Data: ChickWeight2
REML criterion at convergence: 5488
Scaled residuals:
Min 1Q Median 3Q Max
-3.3226 -0.5908 -0.0699 0.5435 3.5918
Random effects:
Groups Name Variance Std.Dev.
Chick (Intercept) 532.9 23.09
Residual 643.0 25.36
Number of obs: 578, groups: Chick, 50
Fixed effects:
Estimate Std. Error t value
(Intercept) 28.1852 3.8206 7.377
Time 6.7729 0.2435 27.818
Diet2:Time 1.8442 0.3857 4.782
Diet3:Time 4.4756 0.3857 11.605
Diet4:Time 2.9418 0.3906 7.532
Correlation of Fixed Effects:
(Intr) Time Dt2:Tm Dt3:Tm
Time -0.287
Diet2:Time 0.008 -0.581
Diet3:Time 0.008 -0.581 0.366
Diet4:Time 0.004 -0.573 0.361 0.361
# answer to h.
lm( weight ~ Diet*Time + Diet*I(Time^2) - Diet, data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet * Time + Diet * I(Time^2) - Diet,
data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-143.223 -10.075 -0.585 8.274 123.360
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 38.05700 3.55089 10.718 < 2e-16 ***
Time 4.52849 0.97255 4.656 4.01e-06 ***
I(Time^2) 0.10994 0.04881 2.253 0.0247 *
Diet2:Time 1.21418 1.22829 0.989 0.3233
Diet3:Time 0.66645 1.22829 0.543 0.5876
Diet4:Time 3.05116 1.23514 2.470 0.0138 *
Diet2:I(Time^2) 0.02287 0.07087 0.323 0.7470
Diet3:I(Time^2) 0.18123 0.07087 2.557 0.0108 *
Diet4:I(Time^2) -0.01139 0.07167 -0.159 0.8737
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 33.44 on 569 degrees of freedom
Multiple R-squared: 0.7817, Adjusted R-squared: 0.7786
F-statistic: 254.6 on 8 and 569 DF, p-value: < 2.2e-16
lm( weight ~ Diet*Time + Diet*I(Time^2) - Diet + Chick, data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet * Time + Diet * I(Time^2) - Diet +
Chick, data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-95.502 -13.076 0.203 13.994 81.077
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 37.98665 2.60577 14.578 < 2e-16 ***
Time 4.50263 0.93250 4.829 1.81e-06 ***
I(Time^2) 0.10337 0.04243 2.436 0.01517 *
Chick.L 37.36872 17.82733 2.096 0.03655 *
Chick.Q -30.14575 15.13162 -1.992 0.04687 *
Chick.C 31.64251 13.38520 2.364 0.01845 *
Chick^4 4.75804 12.70793 0.374 0.70825
Chick^5 -17.87169 10.88152 -1.642 0.10111
Chick^6 51.99355 12.63730 4.114 4.51e-05 ***
Chick^7 1.92716 8.78650 0.219 0.82648
Chick^8 -4.66427 11.15130 -0.418 0.67592
Chick^9 7.42417 9.07255 0.818 0.41355
Chick^10 -17.23033 8.94144 -1.927 0.05452 .
Chick^11 -38.67025 8.19412 -4.719 3.05e-06 ***
Chick^12 10.91829 8.18744 1.334 0.18294
Chick^13 55.44602 7.78830 7.119 3.63e-12 ***
Chick^14 8.36440 7.53577 1.110 0.26753
Chick^15 -44.59191 7.58249 -5.881 7.31e-09 ***
Chick^16 -18.73044 7.58426 -2.470 0.01384 *
Chick^17 23.43568 7.65636 3.061 0.00232 **
Chick^18 17.04334 7.54211 2.260 0.02425 *
Chick^19 -20.10368 7.55775 -2.660 0.00806 **
Chick^20 17.28227 7.78841 2.219 0.02692 *
Chick^21 13.06748 7.21141 1.812 0.07055 .
Chick^22 -13.46959 7.66728 -1.757 0.07955 .
Chick^23 0.51112 7.27698 0.070 0.94403
Chick^24 3.60122 7.35660 0.490 0.62468
Chick^25 -34.28977 7.14904 -4.796 2.11e-06 ***
Chick^26 21.45460 7.24988 2.959 0.00322 **
Chick^27 41.74158 7.18159 5.812 1.08e-08 ***
Chick^28 10.96163 7.15924 1.531 0.12635
Chick^29 -20.72739 7.16074 -2.895 0.00396 **
Chick^30 -8.31765 7.18100 -1.158 0.24728
Chick^31 8.05395 7.34475 1.097 0.27334
Chick^32 16.20988 7.09435 2.285 0.02272 *
Chick^33 -4.34289 7.22026 -0.601 0.54778
Chick^34 -8.54630 7.24031 -1.180 0.23839
Chick^35 0.38663 7.08472 0.055 0.95650
Chick^36 -17.27562 7.18588 -2.404 0.01656 *
Chick^37 -3.80682 7.12368 -0.534 0.59330
Chick^38 -14.79768 7.17399 -2.063 0.03964 *
Chick^39 -7.92055 7.39232 -1.071 0.28446
Chick^40 42.62529 7.25733 5.873 7.62e-09 ***
Chick^41 -14.87047 7.21112 -2.062 0.03969 *
Chick^42 -31.66806 7.14938 -4.429 1.15e-05 ***
Chick^43 -36.08786 7.28236 -4.956 9.77e-07 ***
Chick^44 10.82116 7.29652 1.483 0.13867
Chick^45 -17.17002 7.34838 -2.337 0.01984 *
Chick^46 23.35888 7.18171 3.253 0.00122 **
Chick^47 -10.46481 7.20117 -1.453 0.14677
Chick^48 -16.90634 7.14240 -2.367 0.01830 *
Chick^49 -30.23053 7.14858 -4.229 2.78e-05 ***
Diet2:Time 1.30757 1.57426 0.831 0.40658
Diet3:Time 0.55489 1.57426 0.352 0.72462
Diet4:Time 3.38747 1.58261 2.140 0.03278 *
Diet2:I(Time^2) 0.02692 0.07106 0.379 0.70495
Diet3:I(Time^2) 0.19293 0.07106 2.715 0.00684 **
Diet4:I(Time^2) -0.02033 0.07186 -0.283 0.77736
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.54 on 520 degrees of freedom
Multiple R-squared: 0.8926, Adjusted R-squared: 0.8808
F-statistic: 75.8 on 57 and 520 DF, p-value: < 2.2e-16
lmer( weight ~ Diet*Time + Diet*I(Time^2) - Diet + (1 | Chick), data = ChickWeight2) %>% summary()
Linear mixed model fit by REML ['lmerMod']
Formula: weight ~ Diet * Time + Diet * I(Time^2) - Diet + (1 | Chick)
Data: ChickWeight2
REML criterion at convergence: 5463.7
Scaled residuals:
Min 1Q Median 3Q Max
-4.0129 -0.5185 -0.0110 0.5288 3.4598
Random effects:
Groups Name Variance Std.Dev.
Chick (Intercept) 522.4 22.86
Residual 600.6 24.51
Number of obs: 578, groups: Chick, 50
Fixed effects:
Estimate Std. Error t value
(Intercept) 37.99451 4.14966 9.156
Time 4.53465 0.85256 5.319
I(Time^2) 0.10316 0.03995 2.583
Diet2:Time 1.25349 1.34921 0.929
Diet3:Time 0.58107 1.34921 0.431
Diet4:Time 3.25969 1.35625 2.403
Diet2:I(Time^2) 0.02795 0.06420 0.435
Diet3:I(Time^2) 0.19097 0.06420 2.975
Diet4:I(Time^2) -0.01613 0.06493 -0.248
Correlation of Fixed Effects:
(Intr) Time I(T^2) Dt2:Tm Dt3:Tm Dt4:Tm D2:I(T D3:I(T
Time -0.349
I(Time^2) 0.281 -0.961
Diet2:Time 0.005 -0.557 0.546
Diet3:Time 0.005 -0.557 0.546 0.351
Diet4:Time 0.003 -0.553 0.543 0.349 0.349
Dt2:I(Tm^2) -0.005 0.539 -0.575 -0.961 -0.339 -0.337
Dt3:I(Tm^2) -0.005 0.539 -0.575 -0.339 -0.961 -0.337 0.357
Dt4:I(Tm^2) -0.003 0.532 -0.567 -0.335 -0.335 -0.960 0.353 0.353
# answer to i.
lm( weight ~ Diet*Time + Diet*I(Time^2), data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet * Time + Diet * I(Time^2), data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-143.152 -10.030 -0.732 8.232 123.298
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 38.36142 5.65497 6.784 2.96e-11 ***
Diet2 -0.68130 9.74243 -0.070 0.944274
Diet3 0.46254 9.74243 0.047 0.962150
Diet4 -1.29627 9.75110 -0.133 0.894292
Time 4.47324 1.25962 3.551 0.000415 ***
I(Time^2) 0.11202 0.05742 1.951 0.051574 .
Diet2:Time 1.33695 2.14254 0.624 0.532877
Diet3:Time 0.58427 2.14254 0.273 0.785182
Diet4:Time 3.28497 2.15223 1.526 0.127491
Diet2:I(Time^2) 0.01827 0.09677 0.189 0.850357
Diet3:I(Time^2) 0.18428 0.09677 1.904 0.057375 .
Diet4:I(Time^2) -0.02018 0.09770 -0.207 0.836412
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 33.53 on 566 degrees of freedom
Multiple R-squared: 0.7817, Adjusted R-squared: 0.7774
F-statistic: 184.2 on 11 and 566 DF, p-value: < 2.2e-16
lm( weight ~ Diet*Time + Diet*I(Time^2) + Chick, data = ChickWeight2) %>% summary()
Call:
lm(formula = weight ~ Diet * Time + Diet * I(Time^2) + Chick,
data = ChickWeight2)
Residuals:
Min 1Q Median 3Q Max
-95.502 -13.076 0.203 13.994 81.077
Coefficients: (3 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 203.51819 50.26262 4.049 5.92e-05 ***
Diet2 -131.25205 30.71693 -4.273 2.29e-05 ***
Diet3 110.03466 134.55050 0.818 0.41385
Diet4 -806.44028 323.91501 -2.490 0.01310 *
Time 4.50263 0.93250 4.829 1.81e-06 ***
I(Time^2) 0.10337 0.04243 2.436 0.01517 *
Chick.L 1510.14555 547.40265 2.759 0.00601 **
Chick.Q 1079.33805 562.91629 1.917 0.05573 .
Chick.C 681.83527 367.91322 1.853 0.06441 .
Chick^4 41.93592 68.86579 0.609 0.54282
Chick^5 -622.22819 306.15872 -2.032 0.04262 *
Chick^6 -602.42706 285.18224 -2.112 0.03513 *
Chick^7 -37.50674 18.73521 -2.002 0.04581 *
Chick^8 453.77470 198.53442 2.286 0.02268 *
Chick^9 369.73353 179.81520 2.056 0.04026 *
Chick^10 15.87902 44.73338 0.355 0.72276
Chick^11 -178.73381 83.08895 -2.151 0.03193 *
Chick^12 -180.56081 100.65371 -1.794 0.07341 .
Chick^13 -116.23572 66.62436 -1.745 0.08164 .
Chick^14 -21.30554 14.59568 -1.460 0.14497
Chick^15 91.08619 51.25441 1.777 0.07613 .
Chick^16 164.76329 89.61254 1.839 0.06654 .
Chick^17 147.22288 73.27457 2.009 0.04503 *
Chick^18 6.51372 19.00634 0.343 0.73195
Chick^19 -202.23703 94.78306 -2.134 0.03334 *
Chick^20 -202.52149 96.54484 -2.098 0.03641 *
Chick^21 -20.72148 15.18059 -1.365 0.17284
Chick^22 146.54649 69.01324 2.123 0.03419 *
Chick^23 157.94724 76.80975 2.056 0.04025 *
Chick^24 62.27779 38.81705 1.604 0.10923
Chick^25 -36.73531 12.52312 -2.933 0.00350 **
Chick^26 -37.15069 34.33685 -1.082 0.27978
Chick^27 -83.67071 51.84692 -1.614 0.10718
Chick^28 -99.79016 51.08909 -1.953 0.05132 .
Chick^29 -24.94711 16.25626 -1.535 0.12549
Chick^30 79.16049 43.14119 1.835 0.06709 .
Chick^31 158.17954 79.18053 1.998 0.04627 *
Chick^32 11.89363 8.13659 1.462 0.14441
Chick^33 61.20157 27.20668 2.250 0.02490 *
Chick^34 88.33438 40.73952 2.168 0.03059 *
Chick^35 17.91978 10.35596 1.730 0.08415 .
Chick^36 43.39350 26.12856 1.661 0.09736 .
Chick^37 -27.38755 17.21889 -1.591 0.11232
Chick^38 97.37421 51.63902 1.886 0.05990 .
Chick^39 -191.88665 94.15465 -2.038 0.04206 *
Chick^40 -96.82263 60.22739 -1.608 0.10853
Chick^41 93.81590 50.37049 1.863 0.06309 .
Chick^42 -53.24028 19.33091 -2.754 0.00609 **
Chick^43 -97.01193 33.79076 -2.871 0.00426 **
Chick^44 60.91741 25.29244 2.409 0.01636 *
Chick^45 -103.42280 45.87333 -2.255 0.02458 *
Chick^46 -13.99627 14.11657 -0.991 0.32191
Chick^47 NA NA NA NA
Chick^48 NA NA NA NA
Chick^49 NA NA NA NA
Diet2:Time 1.30757 1.57426 0.831 0.40658
Diet3:Time 0.55489 1.57426 0.352 0.72462
Diet4:Time 3.38747 1.58261 2.140 0.03278 *
Diet2:I(Time^2) 0.02692 0.07106 0.379 0.70495
Diet3:I(Time^2) 0.19293 0.07106 2.715 0.00684 **
Diet4:I(Time^2) -0.02033 0.07186 -0.283 0.77736
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 24.54 on 520 degrees of freedom
Multiple R-squared: 0.8926, Adjusted R-squared: 0.8808
F-statistic: 75.8 on 57 and 520 DF, p-value: < 2.2e-16
lmer( weight ~ Diet*Time + Diet*I(Time^2) + (1 | Chick), data = ChickWeight2) %>% summary()
Linear mixed model fit by REML ['lmerMod']
Formula: weight ~ Diet * Time + Diet * I(Time^2) + (1 | Chick)
Data: ChickWeight2
REML criterion at convergence: 5443.8
Scaled residuals:
Min 1Q Median 3Q Max
-4.0069 -0.5199 -0.0103 0.5290 3.4494
Random effects:
Groups Name Variance Std.Dev.
Chick (Intercept) 547.0 23.39
Residual 601.7 24.53
Number of obs: 578, groups: Chick, 50
Fixed effects:
Estimate Std. Error t value
(Intercept) 38.31971 6.66873 5.746
Diet2 -0.63959 11.52654 -0.055
Diet3 0.50425 11.52654 0.044
Diet4 -1.48943 11.53109 -0.129
Time 4.51117 0.92859 4.858
I(Time^2) 0.10401 0.04230 2.459
Diet2:Time 1.29903 1.57162 0.827
Diet3:Time 0.54635 1.57162 0.348
Diet4:Time 3.36592 1.57985 2.131
Diet2:I(Time^2) 0.02628 0.07097 0.370
Diet3:I(Time^2) 0.19229 0.07097 2.710
Diet4:I(Time^2) -0.02010 0.07176 -0.280
Correlation of Fixed Effects:
(Intr) Diet2 Diet3 Diet4 Time I(T^2) Dt2:Tm Dt3:Tm Dt4:Tm D2:I(T D3:I(T
Diet2 -0.579
Diet3 -0.579 0.335
Diet4 -0.578 0.335 0.335
Time -0.501 0.290 0.290 0.290
I(Time^2) 0.415 -0.240 -0.240 -0.240 -0.963
Diet2:Time 0.296 -0.504 -0.171 -0.171 -0.591 0.569
Diet3:Time 0.296 -0.171 -0.504 -0.171 -0.591 0.569 0.349
Diet4:Time 0.295 -0.170 -0.170 -0.505 -0.588 0.566 0.347 0.347
Dt2:I(Tm^2) -0.247 0.419 0.143 0.143 0.574 -0.596 -0.965 -0.339 -0.337
Dt3:I(Tm^2) -0.247 0.143 0.419 0.143 0.574 -0.596 -0.339 -0.965 -0.337 0.355
Dt4:I(Tm^2) -0.244 0.141 0.141 0.418 0.568 -0.589 -0.335 -0.335 -0.964 0.351 0.351
# answer to j.
# default smooth fit
ChickWeight2 %>%
ggplot(aes(x = Time, y = weight, color = Diet)) +
geom_jitter(size =.75, alpha=.5) + geom_smooth()
# linear fit
ChickWeight2 %>%
ggplot(aes(x = Time, y = weight, color = Diet)) +
geom_jitter(size =.75, alpha=.5) + geom_smooth(method = "lm", formula = y ~ x)
# quadratic fit
ChickWeight2 %>%
ggplot(aes(x = Time, y = weight, color = Diet)) +
geom_jitter(size =.75, alpha=.5) + geom_smooth(method = "lm", formula = y ~ x + I(x^2))
coeff sd tstat
Intercept 27.858834 2.5823722 10.788078
Time 7.049161 0.2557433 27.563426
Diet2.Time 1.611209 0.3048792 5.284746
Diet3.Time 3.738317 0.3054171 12.240038
Diet4.Time 2.861438 0.3078737 9.294191
# function generating a matrix of ones
ones <- function(r,c) matrix(c(rep(1,r*c)),nrow=r,ncol=c)
# confidence interval by bootstrapping
# version 1
ci_ver1 <- function(est_bt, alpha = 0.05) {
# est_bt: bootstrap estimates with row = boot replications, col = coefficients
apply(est_bt, 2, function(x) quantile(x, c(alpha/2, 1 - alpha/2))) %>% t()
}
# version 2
ci_ver2 <- function(est_sample, bt_sd, alpha = 0.05) {
# est_semple: sample estimate vector
# bt_sd: bootstrap sd estimates of est_sample-vector
cbind('2.5%' = est_sample - 1.96 * bt_sd,
'97.5%' = est_sample + 1.96 * bt_sd)
}
# bootstrap p-value
bt_p_val <- function(est_sample, est_bt) {
# est_semple: sample estimate vector
# est_bt: bootstrap estimates with row = boot replications, col = coefficients
est_bt_long <- est_bt %>% data.frame() %>% gather()
est_bt_long <- est_bt_long %>%
mutate(
center_var = kronecker(ones(nrow(est_bt),1), matrix(est_sample, nrow=1)) %>% c(),
extremes = abs(value - center_var) >= abs(center_var)
)
p_val <- est_bt_long %>%
group_by(key) %>%
summarise(p_val = sum(extremes)/nrow(est_bt))
return(list(p_val=p_val, df_long=est_bt_long))
}
ci_ver1(rlt_model_e_bt)
2.5% 97.5%
Intercept 23.009607 32.923324
Time 6.546122 7.530369
Diet2.Time 1.030873 2.228717
Diet3.Time 3.145963 4.335258
Diet4.Time 2.258264 3.464281
ci_ver2(obs_model_e$estimate, bt_sd_e, model_e$df.residual)
2.5% 97.5%
Intercept 22.797384 32.920283
Time 6.547904 7.550418
Diet2.Time 1.013646 2.208772
Diet3.Time 3.139699 4.336934
Diet4.Time 2.258005 3.464870
bt_p_val(obs_model_e$estimate, rlt_model_e_bt)$p_val
# histogram visualization
coeff_bt_histogram <- function(est_sample, est_bt, centering =FALSE) {
# est_semple: sample estimate vector
# est_bt: bootstrap estimates with row = boot replications, col = coefficients
est_bt_long <- bt_p_val(est_sample, est_bt)$df_long
if (centering) {
est_bt_long <- est_bt_long %>%
mutate(
key = paste(key, " - center"),
value = value - center_var,
fill_var = extremes
)
legend_lab <- "Extremes: | value - center | > | center |"
x_lab <- "value - center"
} else {
est_bt_long <- est_bt_long %>%
mutate(
sign = kronecker(ones(nrow(est_bt),1), matrix(ifelse(est_sample>0,1,-1), nrow=1)) %>% c(),
fill_var = value * sign <= 0
)
legend_lab <- "Crossing zero?"
x_lab <- "value"
}
est_bt_long %>%
ggplot(aes(x = value, fill = fill_var)) +
geom_histogram(color = "white", bins = 40) + theme(legend.position="top") +
facet_wrap(~key, scales = "free") + labs(fill = legend_lab, x = x_lab)
}
coeff_bt_histogram(obs_model_e$estimate, rlt_model_e_bt, centering=FALSE)
# answer to b.
model_f <- lm( weight ~ Diet*Time - Diet + Chick, data = ChickWeight2)
rlt_model_f_bt <- run_ols_boot(model_f, num_do = 2000)
obs_model_f <- tidy(model_f) # summary of the original OLS estimates
bt_sd_f <- apply(rlt_model_f_bt, 2, sd) # calculate bootstrap standard errors
bt_mode1_f <- cbind(coeff = obs_model_f$estimate,
sd = bt_sd_f,
tstat = obs_model_f$estimate/bt_sd_f)
# OLS estimate with statistical inferences by statistic theory
obs_model_f
# OLS estimate with statistical inferences by bootstrapping
bt_mode1_f
coeff sd tstat
Intercept 28.2445096 2.0101624 14.05085945
Time 6.6906239 0.2550765 26.22987308
Chick.L 25.6466519 13.7280927 1.86818755
Chick.Q -13.5654294 12.9043379 -1.05123018
Chick.C 51.9445570 12.1471417 4.27627819
Chick.4 9.2225392 11.4673088 0.80424617
Chick.5 -33.3384388 10.4785461 -3.18159013
Chick.6 41.0951586 11.0416294 3.72183826
Chick.7 -0.2107096 8.9908055 -0.02343612
Chick.8 5.8962394 10.0282419 0.58796341
Chick.9 13.4344671 8.8796600 1.51294837
Chick.10 -16.1181057 8.8401925 -1.82327543
Chick.11 -45.0381766 8.2337109 -5.46997303
Chick.12 9.6177311 8.2622410 1.16405840
Chick.13 55.0050219 7.8952974 6.96680814
Chick.14 8.1414846 7.9014837 1.03037416
Chick.15 -45.9058209 7.7981094 -5.88678853
Chick.16 -15.9530106 7.8434220 -2.03393501
Chick.17 28.2004666 7.7343476 3.64613382
Chick.18 17.2879476 7.8393371 2.20528184
Chick.19 -24.3940107 7.5359549 -3.23701655
Chick.20 13.0230252 7.7382680 1.68293799
Chick.21 12.9951812 7.3245032 1.77420651
Chick.22 -10.4667014 7.8268992 -1.33727305
Chick.23 3.4996009 7.4351622 0.47068252
Chick.24 3.9263398 7.2880439 0.53873713
Chick.25 -35.2223376 7.2630198 -4.84954450
Chick.26 20.8034554 7.3867422 2.81632347
Chick.27 41.5753691 7.4293858 5.59607079
Chick.28 8.9788801 7.2295248 1.24197375
Chick.29 -22.6530019 7.3433735 -3.08482223
Chick.30 -7.0641070 7.4130965 -0.95292257
Chick.31 11.6522480 7.6610285 1.52097698
Chick.32 16.4033102 7.1333204 2.29953363
Chick.33 -3.1073624 7.2907980 -0.42620333
Chick.34 -6.9049827 7.3410777 -0.94059523
Chick.35 0.5744950 7.3226644 0.07845437
Chick.36 -16.0760278 7.4601073 -2.15493253
Chick.37 -4.2041629 7.3267193 -0.57381248
Chick.38 -12.9040587 7.4622197 -1.72925204
Chick.39 -11.6779774 7.4381642 -1.57000803
Chick.40 42.2805547 7.5638261 5.58983694
Chick.41 -13.5916736 7.3785671 -1.84204784
Chick.42 -32.8738896 7.3118819 -4.49595470
Chick.43 -38.4670120 7.4159112 -5.18709178
Chick.44 12.3700410 7.4217203 1.66673500
Chick.45 -20.2123257 7.5224253 -2.68694270
Chick.46 23.0112367 7.4130391 3.10415694
Chick.47 -11.3521235 7.4670371 -1.52029827
Chick.48 -16.5301765 7.3286593 -2.25555261
Chick.49 -31.1804687 7.2707222 -4.28849678
Diet2.Time 1.9185124 0.4256758 4.50698062
Diet3.Time 4.7322471 0.4370499 10.82770530
Diet4.Time 2.9653114 0.4281499 6.92587171
ci_ver1(rlt_model_f_bt)
2.5% 97.5%
Intercept 24.406129 32.1791412
Time 6.180458 7.2039834
Chick.L -1.095827 51.9876252
Chick.Q -39.410898 12.0533312
Chick.C 28.435280 75.9569679
Chick.4 -12.263702 32.2433648
Chick.5 -53.352961 -11.9837773
Chick.6 19.520197 62.8513114
Chick.7 -17.530390 17.1464547
Chick.8 -13.552160 25.5288624
Chick.9 -4.365051 30.9651758
Chick.10 -33.111715 1.0657483
Chick.11 -60.848178 -28.6157041
Chick.12 -7.084469 25.6153552
Chick.13 39.464157 70.3852062
Chick.14 -6.832961 23.8763005
Chick.15 -60.698155 -30.0756047
Chick.16 -31.679854 -1.3109406
Chick.17 12.536762 43.1507825
Chick.18 1.582073 32.1589229
Chick.19 -39.821428 -9.9563676
Chick.20 -2.796408 27.3960941
Chick.21 -0.499601 27.4401871
Chick.22 -25.264765 5.0832400
Chick.23 -10.371708 18.3871174
Chick.24 -10.190630 18.6732952
Chick.25 -49.249357 -21.3947896
Chick.26 5.825681 34.8663921
Chick.27 26.259602 56.1861348
Chick.28 -4.555098 23.1617984
Chick.29 -36.512718 -8.4498601
Chick.30 -21.626476 8.1298583
Chick.31 -2.908131 26.4618556
Chick.32 2.951163 30.6086721
Chick.33 -17.322510 11.3530794
Chick.34 -20.820942 7.9392127
Chick.35 -13.612160 15.1896418
Chick.36 -30.619066 -1.5679873
Chick.37 -18.207569 9.9308346
Chick.38 -27.938861 1.2603233
Chick.39 -26.259733 2.8791353
Chick.40 27.585414 56.8156234
Chick.41 -28.000815 0.9710493
Chick.42 -47.455518 -18.7221896
Chick.43 -52.965901 -23.9345338
Chick.44 -2.647379 27.0089861
Chick.45 -34.655970 -5.7875793
Chick.46 8.536233 38.2415722
Chick.47 -25.521500 4.1463967
Chick.48 -30.973309 -2.5058114
Chick.49 -45.670723 -17.6013062
Diet2.Time 1.088158 2.7748230
Diet3.Time 3.892416 5.6068913
Diet4.Time 2.134353 3.7896697
ci_ver2(obs_model_f$estimate, bt_sd_f, model_f$df.residual)
2.5% 97.5%
Intercept 24.304591 32.1844279
Time 6.190674 7.1905738
Chick.L -1.260410 52.5537137
Chick.Q -38.857932 11.7270728
Chick.C 28.136159 75.7529546
Chick.4 -13.253386 31.6984645
Chick.5 -53.876389 -12.8004885
Chick.6 19.453565 62.7367521
Chick.7 -17.832688 17.4112693
Chick.8 -13.759115 25.5515936
Chick.9 -3.969666 30.8386006
Chick.10 -33.444883 1.2086715
Chick.11 -61.176250 -28.9001032
Chick.12 -6.576261 25.8117235
Chick.13 39.530239 70.4798047
Chick.14 -7.345423 23.6283927
Chick.15 -61.190115 -30.6215265
Chick.16 -31.326118 -0.5799035
Chick.17 13.041145 43.3597880
Chick.18 1.922847 32.6530482
Chick.19 -39.164482 -9.6235391
Chick.20 -2.143980 28.1900306
Chick.21 -1.360845 27.3512075
Chick.22 -25.807424 4.8740211
Chick.23 -11.073317 18.0725189
Chick.24 -10.358226 18.2109058
Chick.25 -49.457856 -20.9868189
Chick.26 6.325441 35.2814700
Chick.27 27.013773 56.1369653
Chick.28 -5.190989 23.1487487
Chick.29 -37.046014 -8.2599898
Chick.30 -21.593776 7.4655621
Chick.31 -3.363368 26.6678638
Chick.32 2.422002 30.3846181
Chick.33 -17.397327 11.1826017
Chick.34 -21.293495 7.4835296
Chick.35 -13.777927 14.9269172
Chick.36 -30.697838 -1.4542176
Chick.37 -18.564533 10.1562069
Chick.38 -27.530009 1.7218920
Chick.39 -26.256779 2.9008243
Chick.40 27.455455 57.1056539
Chick.41 -28.053665 0.8703179
Chick.42 -47.205178 -18.5426012
Chick.43 -53.002198 -23.9318260
Chick.44 -2.176531 26.9166129
Chick.45 -34.956279 -5.4683721
Chick.46 8.481680 37.5407933
Chick.47 -25.987516 3.2832691
Chick.48 -30.894349 -2.1660044
Chick.49 -45.431084 -16.9298532
Diet2.Time 1.084188 2.7528369
Diet3.Time 3.875629 5.5888648
Diet4.Time 2.126138 3.8044853
bt_p_val(obs_model_f$estimate, rlt_model_f_bt)$p_val
obs_model_f_time <- obs_model_f %>% filter(grepl("Intercept", term) | grepl("Time", term))
obs_model_f_chick <- obs_model_f %>% filter(grepl("Chick", term))
rlt_model_f_bt_time <- rlt_model_f_bt %>% dplyr::select(contains("Intercept"), contains("Time"))
rlt_model_f_bt_chick <- rlt_model_f_bt %>% dplyr::select(contains("Chick"))
coeff_bt_histogram(obs_model_f_time$estimate, rlt_model_f_bt_time, centering=FALSE)
coeff_bt_histogram(obs_model_f_chick$estimate[1:6], rlt_model_f_bt_chick[,1:6], centering=FALSE)
model_g <- lmer( weight ~ Diet*Time - Diet + (1 | Chick), data = ChickWeight2)
getFormula <- function(ols, lmer=FALSE) gsub("()","", ifelse(!lmer, ols$call[2], ols@call[2]))
getFormula(model_g, lmer=TRUE)
[1] "weight ~ Diet * Time - Diet + (1 | Chick)"
getDependentVar <- function(ols, lmer=FALSE) {
str <- getFormula(ols, lmer=lmer)
gsub(" ","", substr(str, 1, (regexpr("~",str)[1]-1))) 
}
getDependentVar(model_g, lmer=TRUE)
[1] "weight"
run_lmer_boot <- function(lmer_rlt, num_do = 5000) {
# Randome effects (RE) model bootstrapping (random intercepts, not random slopes)
rlt <- list()
rlt$model <- model_g@pp$X # model data for the part of fixed coefficients
N <- length(residuals(lmer_rlt))
rlt$fitted_no_RE <- rlt$model %*% matrix(fixef(lmer_rlt), ncol=1)
RE_vals <- lmer_rlt@flist[[1]] %>% unique() # RE variable values
N_RE <- length(RE_vals) # number of RE variable values
rlt$RE_idx <- rep(NA, N)
for (i in 1:N_RE) rlt$RE_idx[which(lmer_rlt@flist[[1]] == RE_vals[i])] <- i # index of RE variable values
sd_res <- sigma(lmer_rlt) # standard deviation of the residuals
sd_RE <- lmer_rlt@theta * sd_res # standard deviation of RE
dep_var <- getDependentVar(lmer_rlt, lmer=TRUE)
do(num_do) *
({
data_bt <- data.frame(lmer_rlt@frame)
# replace the dependent variable with its bootstrap counterpart
data_bt[[dep_var]] <- rlt$fitted_no_RE + # the predicted component by fixed coefficients
+ rnorm(N_RE, mean = 0, sd = sd_RE)[rlt$RE_idx] + # random draws of tge RE distribution
+ rnorm(N, mean = 0, sd = sd_res) # random draws of the residual distribution
# run the RE model with the same formula but with a new, bootstrap dataset
lmer_bt <- lmer(as.formula(getFormula(lmer_rlt, lmer=TRUE)), data = data_bt)
sd_res_bt <- sigma(lmer_bt)
sd_RE_bt <- lmer_bt@theta * sd_res_bt
c(fixef(lmer_bt), sigma_RE = sd_RE_bt, sigma_res = sd_res_bt) # get fixed coefficients
})
}
# answer to c.
model_g <- lmer( weight ~ Diet*Time - Diet + (1 | Chick), data = ChickWeight2)
rlt_model_g_bt <- run_lmer_boot(model_g, num_do = 2000)
convergence code 3 from bobyqa: bobyqa -- a trust region step failed to reduce q
obs_model_g <- tidy(model_g) # summary of the original lmer estimates
bt_sd_g <- apply(rlt_model_g_bt, 2, sd) # calculate bootstrap standard errors
bt_mode1_g <- cbind(coeff = obs_model_g$estimate,
sd = bt_sd_g,
tstat = obs_model_g$estimate/bt_sd_g)
# random model estimate with statistical inferences by statistic theory (default)
obs_model_g
# random model estimate with statistical inferences by bootstrapping
bt_mode1_g
coeff sd tstat
Intercept 28.185242 3.8751389 7.273350
Time 6.772853 0.2449361 27.651508
Diet2.Time 1.844157 0.3827047 4.818746
Diet3.Time 4.475562 0.3900164 11.475315
Diet4.Time 2.941763 0.3953241 7.441395
sigma_RE 23.085561 2.6270454 8.787652
sigma_res 25.358103 0.7866630 32.235028
ci_ver1(rlt_model_g_bt)
2.5% 97.5%
Intercept 20.877070 36.074627
Time 6.274888 7.264347
Diet2.Time 1.090851 2.589714
Diet3.Time 3.713359 5.216869
Diet4.Time 2.172576 3.729817
sigma_RE 17.755749 28.111087
sigma_res 23.838300 26.869338
ci_ver2(obs_model_g$estimate, bt_sd_g, (nrow(ChickWeight2) - nrow(obs_model_g)))
2.5% 97.5%
Intercept 20.589970 35.780514
Time 6.292778 7.252928
Diet2.Time 1.094055 2.594258
Diet3.Time 3.711129 5.239994
Diet4.Time 2.166928 3.716598
sigma_RE 17.936552 28.234570
sigma_res 23.816244 26.899963
bt_p_val(obs_model_g$estimate, rlt_model_g_bt)$p_val
coeff_bt_histogram(obs_model_g$estimate, rlt_model_g_bt, centering=FALSE)